**************************************************************
* This file constructs Bartik firm-level trade shocks
**************************************************************
* Trade costs are based on US Imports of Merchandise,
* World import demand based on Comtrade data.
*
* Merge these sources with firm-level trade data (UDHI)
* and construct firm-level Bartik shocks 
**************************************************************

clear all
set more off

* Prepare trade costs
***********************************************
do "${do_dir}\Prepare_tradecosts.do"

* Prepare HS6 mapping
***********************************************
do "${do_dir}\Prepare_productcodes.do"


* Main Setup: Bartik trade shocks
***********************************************

* Start with raw trade data from 1998 onwards (UHDI)
cd "${data_dir}"
use "tradedata.dta", clear

rename land iso2

* some minor aggregation to prevent missing data:
replace iso2 = "CS" if iso2 == "XS" | iso2 == "XK" | iso2 == "YU"
* Montenegro ME, Serbia XS, Kosovo XK: replace Serbia & Mont CS
replace iso2 = "CH" if iso2 == "LI"
* Liechtenstein LI: use Switzerland CH
replace iso2 = "ES" if iso2 == "XC" | iso2 == "XL"
* Ceuta XC, Melilla XL: use Spain ES
replace iso2 = "IL" if iso2 == "PS"
* Gaza PS: use Israel IL

collapse (sum) vgt mgd vrd, by(aar cvrnr ie iso2 vare)

label var vgt "weight"
label var mgd "units"
label var vrd "value DKK"

**************************************************************
* Add HS1988/92 concordance, aggregate data to 6-digits
**************************************************************

gen productcode = substr(vare,1,6)
sort aar productcode
merge n:1 aar productcode using firmtrade_HS1986_table
tab aar _merge
keep if _merge == 3
drop _merge

keep aar cvrnr ie iso2 h0_code vrd vgt
order aar cvrnr ie iso2 h0_code vrd vgt
destring aar, replace
drop if aar < 1998
drop if aar > 2008
tostring aar, replace

collapse (sum) vrd vgt, by(aar cvrnr ie iso2 h0_code)

* generate pre-sample trade flows for year 1998 or first year observed

* determine first exporting and importing year for each firm:
sort cvrnr ie aar
by cvrnr ie: gen help_first = aar if _n==1
destring help_first, replace
by cvrnr ie: egen first = max(help_first)
* similarly for the last year before exiting
by cvrnr ie: gen help_last = aar if _n==_N
destring help_last, replace
by cvrnr ie: egen last = max(help_last)
tostring first, replace
* Presample = First Year
keep if aar == first
tab aar

reshape wide vrd vgt, i(cvrnr ie first last iso2 h0_code) j(aar) string
reshape long vrd vgt, i(cvrnr ie first last iso2 h0_code) j(aar) string

destring first, replace
destring aar, replace
bys cvrnr ie: drop if aar < first
bys cvrnr ie: drop if aar > last

* Compute trade shares for presample period and save for all years
bys cvrnr ie iso2 h0_code: egen help_vrd = max(vrd)
bys cvrnr ie iso2 h0_code: egen help_vgt = max(vgt)
bys aar cvrnr ie: egen totaltrade = total(help_vrd)
gen share = help_vrd/totaltrade
replace vrd = help_vrd
replace vgt = help_vgt

tostring aar, replace
drop if aar == "1998" 

preserve
* exports: find world import demand
keep if ie == "2"
keep aar iso2 h0_code
bys aar iso2 h0_code: keep if _n == 1
save firmtrade_presample_exports, replace
restore

save firmtrade_presample, replace

*************************************************************************
* Prepare UN Comtrade data:
*************************************************************************
* Imports_Comtrade.dta contains world import demand by country-product,
* defined as a county's total annual import value for a given HS6 product    
* from the world market excluding the import value from Denmark
* using COMTRADE bilateral trade flows 1999-2008
*************************************************************************
use Imports_Comtrade, clear

* product categories found at HS6 level:
merge 1:1 aar iso2 h0_code using firmtrade_presample_exports

preserve
keep if _merge == 3
drop _merge
keep aar iso2 h0_code importdemand
sort aar iso2 h0_code
save found_firmtrade6, replace
restore

* to cover all product categories, I aggregate to higher HS level if necessary
keep if _merge == 2
drop if h0_code == ""
keep aar iso2 h0_code
gen hs4 = substr(h0_code,1,4)
bys aar iso2 hs4: keep if _n == 1
save missing_firmtrade4, replace

use Imports_Comtrade, clear
gen hs4 = substr(h0_code, 1,4)
collapse (mean) importdemand, by(aar iso2 hs4)

* additional product categories found at HS4 level:
merge 1:1 aar iso2 hs4 using missing_firmtrade4

preserve
keep if _merge == 3
drop _merge
keep aar iso2 h0_code importdemand
sort aar iso2 h0_code
save found_firmtrade4, replace
restore

keep if _merge ==2 
drop _merge
keep aar iso2 h0_code
gen hs2 = substr(h0_code,1,2)
bys aar iso2 hs2: keep if _n == 1
save missing_firmtrade2, replace


use Imports_Comtrade, clear
gen hs2 = substr(h0_code, 1,2)
collapse (mean) importdemand, by(aar iso2 hs2)

merge 1:1 aar iso2 hs2 using missing_firmtrade2

preserve
keep if _merge == 3
drop _merge
keep aar iso2 h0_code importdemand
sort aar iso2 h0_code
save found_firmtrade2, replace
restore


* Remaining missing trade flows cannot be used in constructing the instrument
use found_firmtrade6, clear
append using found_firmtrade4
append using found_firmtrade2

drop if h0_code == ""
drop if importdemand <= 0 | importdemand == .

keep aar iso2 h0_code importdemand
order aar iso2 h0_code importdemand
sort aar iso2 h0_code
save importdemand_firmtrade, replace

****************************************************
*** Combine presample and instruments:
****************************************************

use firmtrade_presample, clear

merge n:1 aar iso2 h0_code using importdemand_firmtrade
tab aar _merge
tab aar if h0_code == ""
drop _merge
replace importdemand = . if ie == "1"


merge n:1 iso2 using distance

preserve 
keep if _merge == 1
bys iso2: keep if _n == 1
list iso2
restore 

* manually add missing values
replace dist = 6246 if iso2 == "CD"
replace dist = 13031 if iso2 == "AQ" | iso2 == "GS"
replace dist = 15427 if iso2 == "AS"
replace dist = 11146 if iso2 == "GU"
replace dist = 1550 if iso2 == "ME"
replace dist = 12854 if iso2 == "TF"
replace dist = 12237 if iso2 == "TL"
replace dist = 11358 if iso2 == "UM"
replace dist = 1533 if iso2 == "VA"
replace dist = 7454 if iso2 == "VI"
replace dist = 8303 if iso2 == "YT"
replace dist = 9027 if iso2 == "IO"

drop _merge

merge n:1 aar iso2 using exchange_rates

tab aar _merge
drop _merge

merge n:1 aar using oilprices

drop _merge

save firmdata_instruments_presample, replace


****************************************************
*** Compute firm-level shocks
**************************************************** 
set more off
use firmdata_instruments_presample, clear
drop if cvrnr == ""
rename iso2 land 
gen h0_hs4 = substr(h0_code,1,4)

* Price Index
gen CPI = 0
* base year is 2000
replace CPI = 1581/1668 if aar == "1998"
replace CPI = 1620/1668 if aar == "1999"
replace CPI = 1668/1668 if aar == "2000"
replace CPI = 1707/1668 if aar == "2001"
replace CPI = 1748/1668 if aar == "2002"
replace CPI = 1785/1668 if aar == "2003"
replace CPI = 1805/1668 if aar == "2004"
replace CPI = 1838/1668 if aar == "2005"
replace CPI = 1873/1668 if aar == "2006"
replace CPI = 1905/1668 if aar == "2007"
replace CPI = 1970/1668 if aar == "2008"

* Danish currency relative to USD
gen DKKUSD = DKK_exrate if iso3 == "USA"
bys aar: egen USDrate = max(DKKUSD)

* Data normalization: in DKK
replace importdemand = importdemand/(CPI*USDrate)
replace vrd = vrd/CPI
replace brent = brent/(CPI*USDrate)
replace wti = wti/(CPI*USDrate)
replace kerosene = kerosene/(CPI*USDrate)

* drop initial year:
tostring first, replace
drop if aar == first
drop if ie == ""

* (A) WID
***************************

gen countID = importdemand != .
bys cvrnr ie land h0_code: egen yearsID = total(countID)
gen share_help = share if years == yearsID & yearsID != . & yearsID != 0
bys cvrnr ie aar: egen totalshare = total(share_help)

* relative weight of each product-destination in presample portfolio of firms
gen vareshare_WID = share_help / totalshare if ie == "2"

* weighted import demand by firm
gen weighted_WID = vareshare_WID*log(importdemand)
bys cvrnr aar: egen I_WES = total(weighted_WES)
by cvrnr aar: egen I_WID = total(weighted_WID)


* (B) Transport Costs
***************************

* Input: oil prices, distance, weight-per-value, EU accession
replace brent = log(brent)
replace wti = log(wti)
gen wpv = vgt/vrd

* Generate dummy for new EU members
destring aar, gen(accession)
rename land iso2
gen neweumember = (iso2 == "CY" | iso2 == "CZ" | iso2 == "EE" | iso2 == "HU" ///
| iso2 == "LV" | iso2 == "MT" | iso2 == "PL" | iso2 == "SI" | iso2 == "SK" ) & accession > 2003
rename iso2 land

* Load estimates from US Imports of Merchandise
gen h0_hs2 = substr(h0_code,1,2)
sort h0_hs2

destring h0_hs2, replace
merge n:1 h0_hs2 using transport_results_all
tab aar _merge
drop if _merge == 2
drop _merge 

* Compute transport cost proxy according to the estimation results from US data:

gen transport_f = hs2groupcoef_F + brent*oilcoef_F ///
	+ log(dist)*distcoef_F + brent*log(dist)*distoilcoef_F

gen transport_l = neweumember*neweu_L + hs2groupcoef_L + brent*oilcoef_L ///
	+ log(dist)*distcoef_L + brent*log(dist)*distoilcoef_L
	
gen transport_b = hs2groupcoef_B + log(wpv)*wpvcoef_B + brent*oilcoef_B ///
	+ log(dist)*distcoef_B + brent*log(dist)*distoilcoef_B ///
	+ log(wpv)*log(dist)*wpvdistcoef_B + brent*log(wpv)*wpvoilcoef_B ///
	+ log(wpv)*brent*log(dist)*wpvdistoilcoef_B

gen transport_h = neweumember*neweu_H + hs2groupcoef_H + log(wpv)*wpvcoef_H ///
	+ brent*oilcoef_H + log(dist)*distcoef_H + brent*log(dist)*distoilcoef_H ///
	+ log(wpv)*log(dist)*wpvdistcoef_H + brent*log(wpv)*wpvoilcoef_H ///
	+ log(wpv)*brent*log(dist)*wpvdistoilcoef_H

	
* Now construct firm-level measure based on presample weights:

* Model f/l is for all product groups, only missing distance for few countries:
bys cvrnr ie land aar: gen evald = 1 if _n == 1 & dist != .
bys cvrnr ie land: egen yearsd = total(evald)
drop countryshare_help totalshare
gen countryshare_help = share if years == yearsd
bys cvrnr ie aar: egen totalshare = total(countryshare_help)
gen countryshare_d = countryshare_help / totalshare

* Model b/h requires wpv by hs2 (fixed for the presample!)
bys cvrnr ie land h0_hs2 aar: gen countwpv = wpv != . & hs2groupcoef_a != . & _n == 1
bys cvrnr ie land h0_hs2: egen yearswpv = total(countwpv)
cap drop share_help
gen share_help = share if years == yearswpv & yearswpv != . & yearswpv != 0
cap drop totalshare
bys cvrnr ie aar: egen totalshare = total(share_help)
gen vareshare_wpv = share_help / totalshare

foreach val in f l {
gen weighted_transport_`val' = countryshare_d*transport_`val'
bys cvrnr aar ie: egen I_transport_`val' = total(weighted_transport_`val')
}
foreach val in b h {
gen weighted_transport_`val' = vareshare_wpv*transport_`val'
bys cvrnr aar ie: egen I_transport_`val' = total(weighted_transport_`val')
}


* Save firm-level instruments by export/import:
***************************************************
bys cvrnr ie aar: gen evalf = _n==1
keep if evalf == 1
keep I_WID I_transport* cvrnr aar ie
reshape wide I_transport*, i(cvrnr aar I_WID) j(ie) string

foreach x in I_WID I_transport_b1 I_transport_b2 I_transport_f1 I_transport_f2 ///
I_transport_h1 I_transport_h2 I_transport_l1 I_transport_l2 {
replace `x' = . if `x' == 0
}

* Trade costs on a scale from 0 to 100
replace I_transport_f2 = 100*I_transport_f2
replace I_transport_b2 = 100*I_transport_b2
replace I_transport_l2 = 100*I_transport_l2
replace I_transport_h2 = 100*I_transport_h2

sort aar cvrnr
save bartik_instruments.dta, replace


*******************************************************************
* Repeat for 1000 most important product-destinations 
*******************************************************************

*******************************************************************
* Find 1000 most important product-destinations among exporters 
* in the sample in their first year (determining Bartik shares)
*******************************************************************

set more off
use firmdata_instruments_presample, clear
drop if cvrnr == ""

rename iso2 land 

tab first
tostring first, replace

* Price Index
gen CPI = 0
* base year is 2000
replace CPI = 1581/1668 if first == "1998"
replace CPI = 1620/1668 if first == "1999"
replace CPI = 1668/1668 if first == "2000"
replace CPI = 1707/1668 if first == "2001"
replace CPI = 1748/1668 if first == "2002"
replace CPI = 1785/1668 if first == "2003"
replace CPI = 1805/1668 if first == "2004"
replace CPI = 1838/1668 if first == "2005"
replace CPI = 1873/1668 if first == "2006"
replace CPI = 1905/1668 if first == "2007"
replace CPI = 1970/1668 if first == "2008"

replace vrd = vrd / CPI

merge n:1 cvrnr using firmsample, keepusing(sample)
keep if sample == 1
drop if _merge == 2
drop _merge

keep if ie == "2"
bys cvrnr land h0_code: keep if _n==1
rename land iso2

* Should I drop unknown quantities and condition on those pairs
* for which I can build a match?
keep if importdemand > 0 & importdemand != .
* Exclude Greenland
drop if iso2 == "GL"

collapse (sum) vrd, by(iso2 h0_code)

drop if h0_code == ""
gen iso1 = substr(iso2, 1,1)
drop if iso1 == "Q" & iso2 != "QA"
gsort - vrd
gen selected = _n<= 1000
keep if selected == 1
keep iso2 h0_code
sort iso2 h0_code
save countryproducts_prio1000, replace


****************************************************
*** Construct Bartik shock for 1000 top destinations
****************************************************
cd "${data_dir}"
use "tradedata.dta", clear 

rename land iso2
collapse (sum) vgt mgd vrd, by(aar cvrnr ie iso2 vare)

label var vgt "weight"
label var mgd "units"
label var vrd "value DKK"

**************************************************************
* Next, add HS1988/92 concordance, aggregate data to 6-digits
**************************************************************

gen productcode = substr(vare,1,6)
sort aar productcode
merge n:1 aar productcode using firmtrade_HS1986_table
tab aar _merge
keep if _merge == 3
drop _merge

keep aar cvrnr ie iso2 h0_code vrd vgt
order aar cvrnr ie iso2 h0_code vrd vgt
destring aar, replace
drop if aar < 1998
drop if aar > 2008
tostring aar, replace

collapse (sum) vrd vgt, by(aar cvrnr ie iso2 h0_code)

merge n:1 iso2 h0_code using countryproducts_prio1000
gen priority =_merge == 3
drop _merge

collapse (sum) vrd vgt, by(aar cvrnr ie iso2 h0_code priority)

*****************************************************
* Find the presample:
*****************************************************

* generate pre-sample trade flows for year 1998 or first year observed

* determine first exporting and importing year for each firm:
sort cvrnr ie aar
by cvrnr ie: gen help_first = aar if _n==1
destring help_first, replace
by cvrnr ie: egen first = max(help_first)
* similarly for the last year before exiting
by cvrnr ie: gen help_last = aar if _n==_N
destring help_last, replace
by cvrnr ie: egen last = max(help_last)
tostring first, replace
* Presample = First Year
keep if aar == first
tab aar

keep if priority == 1
reshape wide vrd vgt, i(cvrnr ie first last iso2 h0_code) j(aar) string
reshape long vrd vgt, i(cvrnr ie first last iso2 h0_code) j(aar) string

destring first, replace
destring aar, replace
bys cvrnr ie: drop if aar < first
bys cvrnr ie: drop if aar > last

* Compute trade shares for presample period and save for all years
bys cvrnr ie iso2 h0_code: egen help_vrd = max(vrd)
bys cvrnr ie iso2 h0_code: egen help_vgt = max(vgt)
bys aar cvrnr ie: egen totaltrade = total(help_vrd)
gen share = help_vrd/totaltrade
replace vrd = help_vrd
replace vgt = help_vgt

tostring aar, replace
drop if aar == "1998" 

save firmtrade_presample_prio1000, replace

use firmtrade_presample_prio1000, clear

* exports: find world import demand
keep if ie == "2"
keep aar iso2 h0_code
bys aar iso2 h0_code: keep if _n == 1
save firmtrade_presample_prio1000_exports, replace

**************************************
* Prepare UN Comtrade data on WID
**************************************

use Imports_Comtrade, clear

merge n:1 iso2 h0_code using countryproducts_prio1000
keep if _merge == 3
drop _merge

collapse (sum) importdemand, by(aar iso2 h0_code)

save Imports_prio1000, replace


merge 1:1 aar iso2 h0_code using firmtrade_presample_prio1000_exports
tab aar _merge

* How many countries are affected each year?
preserve 
keep if _merge == 2
tab aar if h0_code == ""
drop if h0_code == ""
keep aar iso2 h0_code
gen hs4 = substr(h0_code,1,4)
bys aar iso2 hs4: keep if _n == 1
save missing_firmtrade4, replace
restore

keep if _merge == 3
drop _merge
keep aar iso2 h0_code importdemand
sort aar iso2 h0_code
save found_firmtrade6, replace

* aggregate the missing ones to 4 digit, then search again:
use Imports_prio1000, clear

gen hs4 = substr(h0_code, 1,4)
collapse (mean) importdemand, by(aar iso2 hs4)

merge 1:1 aar iso2 hs4 using missing_firmtrade4

preserve
keep if _merge == 3
drop _merge
keep aar iso2 h0_code importdemand
sort aar iso2 h0_code
save found_firmtrade4, replace
restore

preserve 
keep if _merge ==2 
drop _merge
keep aar iso2 h0_code
gen hs2 = substr(h0_code,1,2)
bys aar iso2 hs2: keep if _n == 1
save missing_firmtrade2, replace
restore

* aggregate the missing ones to 2 digit, then search again:
use Imports_prio1000, clear

gen hs2 = substr(h0_code, 1,2)
collapse (mean) importdemand, by(aar iso2 hs2)

merge 1:1 aar iso2 hs2 using missing_firmtrade2

preserve
keep if _merge == 3
drop _merge
keep aar iso2 h0_code importdemand
sort aar iso2 h0_code
save found_firmtrade2, replace
restore

* If not all products have been found for some country, then we will not use trade flows to this country at all to construct WID
use found_firmtrade6, clear
append using found_firmtrade4
append using found_firmtrade2

tab aar if h0_code == ""
drop if h0_code == ""

keep aar iso2 h0_code importdemand
order aar iso2 h0_code importdemand
sort aar iso2 h0_code
save importdemand_firmtrade_prio1000, replace


****************************************************
*** Combine presample and instruments:
****************************************************
use firmtrade_presample_prio1000, clear

merge n:1 aar iso2 h0_code using importdemand_firmtrade_prio1000
tab aar _merge
tab aar if h0_code == ""
drop _merge
replace importdemand = . if ie == "1"

* save pre-collapse data for later use 
save firmdata_instruments_presample_prio1000, replace

drop if cvrnr == ""
rename iso2 land 
gen h0_hs4 = substr(h0_code,1,4)

* drop initial year:
tostring first, replace
drop if aar == first
drop if ie == ""

* WID
***************************
bys cvrnr ie aar: gen evalf = _n==1
bys cvrnr ie: egen years = total(evalf)

* now we need product-level shares that exist through all periods
* determine countries for which WID exists for all periods:
tab aar if importdemand < 0
replace importdemand = 0 if importdemand < 0
gen countID = importdemand != .
bys cvrnr ie land h0_code: egen yearsID = total(countID)
gen share_help = share if years == yearsID & yearsID != . & yearsID != 0
bys cvrnr ie aar: egen totalshare = total(share_help)
gen vareshare_WID = share_help / totalshare if ie == "2"

gen weighted_WID = vareshare_WID*log(importdemand)
by cvrnr aar: egen I_WID = total(weighted_WID)
sum I_WID if evalf == 1, det

* Save firm-level instruments
***************************************************
keep if evalf == 1
keep I_WID cvrnr aar ie

foreach x in I_WID {
replace `x' = . if `x' == 0
}

sort aar cvrnr
keep if ie== "2"
keep cvrnr aar I_WID

rename I_WID I_WID_new
keep cvrnr aar I_WID_new

save firmsample_prio1000shock, replace

***************************************************
* Save main file for Bartik sensitivity tests
***************************************************
use hierarchy_IV_data.dta, clear

merge 1:1 cvrnr aar using firmsample_prio1000shock
drop if _merge == 2
drop _merge

destring aar, replace
xtset firmcode aar

keep cvrnr aar logsales I_WID_new t sector009 sample firmcode sector027 
rename I_WID_new shock

save firmsample_prio1000_Bartik_diagnostics, replace


************************************************************
* Save supplementary files for Bartik sensitivity tests 
************************************************************

set more off
use firmdata_instruments_presample_prio1000, clear
drop if cvrnr == ""

rename iso2 land 
gen h0_hs4 = substr(h0_code,1,4)

egen id_proddest = group(land h0_code)
sum id_proddest

* drop initial year:
tostring first, replace
drop if aar == first
drop if ie == ""

*** Compute firm-level shocks
**************************************************** 
bys cvrnr ie aar: gen evalf = _n==1
bys cvrnr ie: egen years = total(evalf)

* now we need product-level shares that exist through all periods
* determine countries for which WID exists for all periods:
tab aar if importdemand < 0
replace importdemand = 0 if importdemand < 0
gen countID = importdemand != .
bys cvrnr ie land h0_code: egen yearsID = total(countID)
gen share_help = share if years == yearsID & yearsID != . & yearsID != 0

bys cvrnr ie aar: egen totalshare = total(share_help)
gen vareshare_WID = share_help / totalshare if ie == "2"

* Create a list of Bartik weights by firm, product and destination
***********************************************************************
preserve
keep if ie == "2" 
keep cvrnr id_proddest vareshare_WID

sort cvrnr id_proddest
by cvrnr id_proddest: keep if _n==1

reshape wide vareshare_WID, i(cvrnr) j(id_proddest)
reshape long vareshare_WID, i(cvrnr) j(id_proddest)

replace vareshare_WID = 0 if vareshare_WID == .

save firmsample_prio1000_productdest_shares, replace
restore

* Create a list of Bartik shocks by year, product and destination
***********************************************************************
preserve
keep if ie == "2" 
keep aar id_proddest importdemand

replace importdemand = log(importdemand)

sort aar id_proddest
by aar id_proddest: keep if _n==1

reshape wide importdemand, i(aar) j(id_proddest)
reshape long importdemand, i(aar) j(id_proddest)

replace importdemand = 0 if importdemand == .

save firmsample_prio1000_productdest_importdemand, replace
restore

